# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py
# Cufflinks wrapper on plotly
import cufflinks
# Data science imports
import pandas as pd
import numpy as np
# Options for pandas
pd.options.display.max_columns = 30
# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
from plotly.offline import iplot, init_notebook_mode
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')
from src.prepare_datasets import get_prepared_datasets
train_df, test_df = get_prepared_datasets()
train_df.head()
| high | low | open | close | volume | H-L | H-PC | L-PC | MACD | Stochastics Oscillator | ATR | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.006160 | 0.006157 | 0.006160 | 0.006157 | 0.009251 | 0.000242 | 0.000000 | 0.000244 | 0.000820 | 0.984296 | 0.000242 |
| 1 | 0.006160 | 0.006157 | 0.006158 | 0.006160 | 0.044149 | 0.000242 | 0.000282 | 0.000000 | 0.000644 | 0.980392 | 0.000242 |
| 2 | 0.006162 | 0.006157 | 0.006160 | 0.006162 | 0.050928 | 0.000459 | 0.000252 | 0.000244 | 0.000498 | 1.000000 | 0.000459 |
| 3 | 0.006162 | 0.006157 | 0.006162 | 0.006157 | 0.004259 | 0.000476 | 0.000000 | 0.000479 | 0.000279 | 0.933750 | 0.000476 |
| 4 | 0.006162 | 0.006155 | 0.006157 | 0.006156 | 0.004304 | 0.000649 | 0.000533 | 0.000192 | 0.000070 | 0.918750 | 0.000649 |
train_df[59::60].iplot(subplots=True)
train_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1815724 entries, 0 to 1815723 Data columns (total 11 columns): # Column Dtype --- ------ ----- 0 high float64 1 low float64 2 open float64 3 close float64 4 volume float64 5 H-L float64 6 H-PC float64 7 L-PC float64 8 MACD float64 9 Stochastics Oscillator float64 10 ATR float64 dtypes: float64(11) memory usage: 152.4 MB
target_column = 'close'
from src.window_generator import WindowGenerator
w1 = WindowGenerator(
input_width=24, label_width=1, shift=24,
train_df=train_df, test_df=test_df,
label_columns=[target_column]
)
w1
Total window size: 48 Input indices: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23] Label indices: [47] Label column name(s): ['close']
w1.plot(plot_col=target_column)
n 0 from 3 n 1 from 3 n 2 from 3
w1.train.element_spec
(TensorSpec(shape=(None, 24, 11), dtype=tf.float32, name=None), TensorSpec(shape=(None, 1, 1), dtype=tf.float32, name=None))
single_step_window = WindowGenerator(
input_width=1, label_width=1, shift=1,
train_df=train_df, test_df=test_df,
label_columns=[target_column])
single_step_window
Total window size: 2 Input indices: [0] Label indices: [1] Label column name(s): ['close']
import tensorflow as tf
from src.BaselineModel import Baseline
column_indices = {name: i for i, name in enumerate(train_df.columns)}
baseline = Baseline(label_index=column_indices[target_column])
baseline.compile(
loss=tf.losses.MeanSquaredError(),
metrics=[tf.metrics.MeanAbsoluteError(), tf.metrics.MeanSquaredLogarithmicError()]
)
baseline.evaluate(single_step_window.test, verbose=1)
227029/227029 [==============================] - 248s 1ms/step - loss: 2.0541e-08 - mean_absolute_error: 5.4025e-05 - mean_squared_logarithmic_error: 1.2958e-08
[1.1815568967676882e-07, 0.00014772806025575846, 6.14403390386542e-08]
wide_window = WindowGenerator(
input_width=32, label_width=32, shift=1,
train_df=train_df, test_df=test_df,
label_columns=[target_column])
wide_window
Total window size: 33 Input indices: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31] Label indices: [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32] Label column name(s): ['close']
print('Input shape:', wide_window.example[0].shape)
print('Output shape:', baseline(wide_window.example[0]).shape)
Input shape: (33, 32, 11) Output shape: (33, 32, 1)
wide_window.plot(baseline)
n 0 from 3 len(inputs) 33 n 1 from 3 len(inputs) 33 n 2 from 3 len(inputs) 33
len(train_df)
batch_size = 8
full_window_width = 33
train_delimetor = len(train_df) // (full_window_width * batch_size)
train_delimetor
1815724
6877
len(test_df)
test_delimetor = len(test_df) // (full_window_width * batch_size)
test_delimetor
454058
1719